6 Microbial metagenomics

6.1 Microbial DNA fraction

6.1.1 Data overview

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(singlem_fraction, na.rm=T),sd=sd(singlem_fraction, na.rm=T)) %>% 
    tt()
tinytable_rci3vpku7zr4kxac7e6d
sample_type mean sd
Anal/cloacal swab 0.2598510 0.3407707
Faecal 0.5203527 0.2759433
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(singlem_fraction, na.rm=T),sd=sd(singlem_fraction, na.rm=T)) %>% 
    tt()
tinytable_u0jv9yz9r4qs5ylzmp5h
tax_group mean sd
Amphibians 0.5807892 0.1799217
Bats 0.2903304 0.2949997
Birds 0.2777113 0.3360460
Mammals 0.5871579 0.2841097
Reptiles 0.5885336 0.1701545

6.1.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    lm(singlem_fraction ~ sample_type * tax_group, data = .)  %>%
    anova() %>%
    tidy()
# A tibble: 4 × 6
  term                     df  sumsq  meansq statistic    p.value
  <chr>                 <int>  <dbl>   <dbl>     <dbl>      <dbl>
1 sample_type               1  19.4  19.4        311.   6.69e- 65
2 tax_group                 4  35.1   8.78       141.   3.70e-106
3 sample_type:tax_group     4   6.96  1.74        27.9  1.46e- 22
4 Residuals              2015 126.    0.0624      NA   NA        

6.1.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=singlem_fraction, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0, 1) +
        geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type) +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/microbialdata_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=singlem_fraction, x=sample_type, group=sample_type)) +
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/microbialdata_taxa_all.pdf",width=9, height=4, units="in")

6.2 Domain-adjusted mapping rate

6.2.1 Data summary

left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(damr, na.rm=T),sd=sd(damr, na.rm=T)) %>% 
    tt()
tinytable_f2mxglcwsmv0r6m9iwuj
sample_type mean sd
Anal/cloacal swab 72.84533 22.25244
Faecal 86.58154 25.00932
left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(damr, na.rm=T),sd=sd(damr, na.rm=T)) %>% 
    tt()
tinytable_4ujca91azoj8d8bwexox
tax_group mean sd
Amphibians 89.19356 24.84575
Bats 80.94051 24.67483
Birds 67.03815 24.99000
Mammals 91.06092 16.77450
Reptiles 84.52325 31.14150

6.2.2 Statistical test

left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
    filter(singlem_fraction>0) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    lm(damr ~ sample_type + tax_group, data = .)  %>%
    anova() %>%
    tidy()
# A tibble: 3 × 6
  term           df    sumsq meansq statistic   p.value
  <chr>       <int>    <dbl>  <dbl>     <dbl>     <dbl>
1 sample_type     1   51142. 51142.      90.4  4.42e-21
2 tax_group       4  114714. 28679.      50.7  4.21e-41
3 Residuals    2561 1449606.   566.      NA   NA       

6.2.3 Plot

left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(singlem_fraction>0) %>% 
    ggplot(aes(y=damr, x=sample_type, color=sample_type, fill=sample_type, group=sample_type)) +
        geom_boxplot(outlier.shape = NA) +
        scale_color_manual(values = c("#bdca50", "#AA3377")) +   
        scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
        theme_classic() +
        labs(y="DAMR", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/damr_type.pdf",width=5, height=4, units="in")
left_join(read_tsv("data/mapping.tsv"),
          read_tsv("data/preprocessing.tsv"),
          by="sequence_id") %>%
    left_join(read_tsv("data/sample.tsv"), by="sample_id") %>% 
    mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>% 
    mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(singlem_fraction>0) %>% 
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=singlem_fraction, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="DAMR", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/damr_taxa.pdf",width=9, height=4, units="in")

6.3 Assemblies

6.3.1 Data summary

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(tax_group) %>%
    summarise(mean=mean(assembly_length, na.rm=T),sd=sd(assembly_length, na.rm=T)) %>% 
    tt()
tinytable_84rgb4euot2chg2ul0wc
tax_group mean sd
Amphibians 116614689 68762523
Bats 31069176 77144825
Birds 8688016 26409919
Mammals 96415766 69841084
Reptiles 139349280 74190452
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(sample_type) %>%
    summarise(mean=mean(assembly_length, na.rm=T),sd=sd(assembly_length, na.rm=T)) %>% 
    tt()
tinytable_fq8gzammd9mf9ru4xat5
sample_type mean sd
Anal/cloacal swab 13208042 34504175
Faecal 95955306 83010036

6.3.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    lm(assembly_length ~ sample_type + tax_group, data = .)  %>%
    anova() %>%
    tidy()
# A tibble: 3 × 6
  term           df   sumsq  meansq statistic    p.value
  <chr>       <int>   <dbl>   <dbl>     <dbl>      <dbl>
1 sample_type     1 1.07e18 1.07e18      247.  8.15e- 52
2 tax_group       4 2.97e18 7.43e17      171.  2.10e-121
3 Residuals    1549 6.73e18 4.34e15       NA  NA        

6.3.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=assembly_length, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0,400000000)+
        geom_jitter(alpha = 0.3, width=0.3, size=0.5) +
        geom_violin() +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type, scale="free") +
        labs(y="Assembly size", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/assemblysize_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=assembly_length, x=sample_type, group=sample_type)) +
          ylim(0,400000000)+
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Assembly size", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/assemblysize_taxa_all.pdf",width=9, height=4, units="in")

6.4 Number of MAGs

6.4.1 Summary statistics

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(tax_group) %>%
    summarise(mean=mean(assembly_num_bins, na.rm=T),sd=sd(assembly_num_bins, na.rm=T)) %>% 
    tt()
tinytable_5nszlhp1h61kk7fn4x61
tax_group mean sd
Amphibians 23.758389 14.944334
Bats 3.692308 7.451098
Birds 1.582979 5.600340
Mammals 22.062635 16.345908
Reptiles 23.924107 13.736366
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    group_by(sample_type) %>%
    summarise(mean=mean(assembly_num_bins, na.rm=T),sd=sd(assembly_num_bins, na.rm=T)) %>% 
    tt()
tinytable_bvvwjskavky2og2w3nil
sample_type mean sd
Anal/cloacal swab 2.644068 6.971768
Faecal 18.386792 16.099835

6.4.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    lm(assembly_num_bins ~ sample_type * tax_group, data = .)  %>%
    anova() %>%
    tidy()
# A tibble: 4 × 6
  term                     df   sumsq meansq statistic    p.value
  <chr>                 <int>   <dbl>  <dbl>     <dbl>      <dbl>
1 sample_type               1  38873. 38873.     259.   6.50e- 54
2 tax_group                 4 119651. 29913.     199.   1.06e-137
3 sample_type:tax_group     4  13549.  3387.      22.5  4.16e- 18
4 Residuals              1545 232279.   150.      NA   NA        

6.4.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(aes(y=assembly_num_bins, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0,100) +
        geom_jitter(alpha = 0.6, width=0.3, size=0.5) +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type, scale="free") +
        labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/numberofbins_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=assembly_num_bins, x=sample_type, group=sample_type)) +
        ylim(0,100) +
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/numberofbins_taxa_all.pdf",width=9, height=4, units="in")

6.5 MAG quality

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=mag_completeness, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(50,100) +
        #geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="Number of bins", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/completeness_taxa.pdf",width=5, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=mag_contamination, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        ylim(0,10) +
        #geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
        stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="Number of bins", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/contamination_taxa.pdf",width=5, height=4, units="in")

6.6 Assemblies vs MAGs

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>% 
    left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    filter(assembly_type == "Individual") %>% 
    ggplot(aes(y=assembly_num_bins, x=assembly_length, color=tax_group)) +
        geom_point(alpha=0.5, size=0.5) +
        scale_color_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="Number of bins", x="Assembly length", color="Taxa", fill="Taxa")